/* clean dataset */
option compress=yes;
proc datasets lib=work nolist kill; run; quit;
DM "log; clear; ";
DM "odsresults; clear";

/* library */
libname sprd "This is the path for just downloaded spread";
libname link "This is the path for the link file between crsp and taq";
libname crsp "This is the path for crsp";


*============================================================================;
*step 1.1: NASDAQ, after 2015
*============================================================================;

proc sql;
 create table nasdaq_after2015 as
 select a.DATE, a.time, a.symbol, a.BEST_BID, a.BEST_ASK, a.spread, a.CUSIP, b.permno
 from sprd.nasdaq_2015to22 as a left join link.taqmclink_cusip_2010 as b
 on a.cusip = b.cusip and a.date >= b.date1 and a.date <= b.date2
 where permno is not missing
 order by a.date, a.symbol, a.time
;
quit;

proc sql;
 create table nasdaq_after2015_1 as
 select a.*, b.SHRCD, b.EXCHCD, (b.SHROUT*b.CFACSHR) as SHROUT_ADJ
 from nasdaq_after2015 as a left join crsp.nasdaq_shrout as b
 on a.permno = b.permno and a.date = b.date
 where calculated shrout_adj is not missing
 order by a.date, a.symbol, a.time
;
quit; 


*============================================================================;
*step 1.2: NASDAQ, before 2015
*============================================================================;

data nasdaq_before2014; 
 set sprd.nasdaq_1993to95 sprd.nasdaq_1996 sprd.nasdaq_1997to200406
     sprd.nasdaq_2004to0412 sprd.nasdaq_2005to14;
run;

proc sql;
 create table nasdaq_before2014_1 as
 select a.DATE, a.time, a.symbol, a.BEST_BID, a.BEST_ASK, a.spread, a.CUSIP, b.permno
 from nasdaq_before2014 as a left join link.tclink as b
 on a.cusip = b.cusip_full and year(a.date) = year(b.date) and month(a.date) = month(b.date)
 where permno is not missing and a.date >= b.fdate
 order by a.date, a.symbol, a.time
;
quit;

proc sql;
 create table nasdaq_before2014_2 as
 select a.*, b.SHRCD, b.EXCHCD, (b.SHROUT*b.CFACSHR) as SHROUT_ADJ
 from nasdaq_before2014_1 as a left join crsp.nasdaq_shrout as b
 on a.permno = b.permno and a.date = b.date
 where calculated shrout_adj is not missing
 order by a.date, a.symbol, a.time
;
quit;


*============================================================================;
*step 1.3: NASDAQ, merge before and after 2015
*============================================================================;

data nasdaq_spread; set nasdaq_before2014_2 nasdaq_after2015_1; spread_ab = BEST_ASK - BEST_BID; run; 

/* clear more space on disk */
proc datasets nolist lib=work; 
 delete nasdaq_after2015 nasdaq_after2015_1 
        nasdaq_before2014 nasdaq_before2014_1 nasdaq_before2014_2; 
run; quit;
 

*============================================================================;
*step 2: NASDAQ, averge spread
*============================================================================;

proc sort data = nasdaq_spread; by date time permno; run;
proc means data= nasdaq_spread noprint;
by date time;
where shrcd in (10 11);
var spread;
output out = _nasdaq_1 (drop=_TYPE_ _FREQ_) N=obs MEAN=spread STD=spread_std;
run;

proc means data= _nasdaq_1 noprint;
where spread <= 0.10;
by time;
var spread spread_std;
output out = _nasdaq_out_1 (drop=_TYPE_ _FREQ_) N=obs MEAN=;
run;

proc means data= nasdaq_spread noprint;
by date time;
where shrcd in (10 11);
var spread_ab;
output out = _nasdaq_2 (drop=_TYPE_ _FREQ_) N=obs MEAN=spread_ab STD=spread_ab_std;
run;

proc means data= _nasdaq_2 noprint;
where spread_ab <= 10;
by time;
var spread_ab spread_ab_std;
output out = _nasdaq_out_2 (drop=_TYPE_ _FREQ_) N=obs MEAN=;
run;
